package com.zyx.javademo.jsoup;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.net.URL;

/**
 * @author Yaxi.Zhang
 * @since 2021/8/6 13:49
 * desc: Jsoup入门案例
 */
public class JsoupBasicDemo {
    public static void main(String[] args) throws Exception {
        // 准备抓取的目标地址
        String url = "https://sc.hkex.com.hk/gb/www.hkex.com.hk/chi/stat/smstat/ssturnover/ncms/mshtmain_c.htm";
        // 链接到目标地址
        Connection connect = Jsoup.connect(url);
        // 设置useragent,设置超时时间，并以get请求方式请求服务器
        /*Document document = connect.userAgent("Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)")
                .timeout(6000).ignoreContentType(true).get();*/
        // 解析网址
        Document document = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        Thread.sleep(1000);
        // 获取指定标签的数据
        Elements pre = document.getElementsByTag("pre");
        // 输出文本数据
        System.out.println("++++++++++++++++++++++++++++++");
        System.out.println(pre.text());
        // 输出html数据
        System.out.println("++++++++++++++++++++++++++++++");
        System.out.println(pre.html());
    }
}
